From 78e09264ad9836fa47610399e4a5b4d3cd62caf5 Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Tue, 22 Sep 2009 08:19:16 +0100 Subject: [PATCH] x86: map M2P table sparsely Avoid backing M2P table holes with memory, when those holes are large enough to cover an exact multiple of large pages. For the sake of saving and migrating guests, XENMEM_machphys_mfn_list fills the holes in the array it returns with the MFN for the previous range returned (thanks to Keir pointing out that it really doesn't matter *what* MFN gets returned for invalid ranges). Using the most recently encountered MFN (rather than e.g. always the first one) represents an attempt to cut down on the number of references these pages will get when they get mapped into a privileged domain's address space. This also allows for saving a couple of 2M pages even on certain "normal" systems. Signed-off-by: Jan Beulich --- xen/arch/x86/x86_32/mm.c | 43 ++++++++--- xen/arch/x86/x86_64/compat/mm.c | 23 ++++-- xen/arch/x86/x86_64/mm.c | 126 ++++++++++++++++++++++---------- 3 files changed, 137 insertions(+), 55 deletions(-) diff --git a/xen/arch/x86/x86_32/mm.c b/xen/arch/x86/x86_32/mm.c index 09b65a3b0c..5c80f0dff3 100644 --- a/xen/arch/x86/x86_32/mm.c +++ b/xen/arch/x86/x86_32/mm.c @@ -72,7 +72,7 @@ void __init paging_init(void) { unsigned long v; struct page_info *pg; - int i; + unsigned int i, n; if ( cpu_has_pge ) { @@ -96,8 +96,18 @@ void __init paging_init(void) */ mpt_size = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1; mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL); +#define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned long)) +#define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \ + sizeof(*machine_to_phys_mapping)) + BUILD_BUG_ON((sizeof(*frame_table) & ~sizeof(*frame_table)) % \ + sizeof(*machine_to_phys_mapping)); for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ ) { + for ( n = 0; n < CNT; ++n) + if ( mfn_valid(MFN(i) + n * PDX_GROUP_COUNT) ) + break; + if ( n == CNT ) + continue; if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL ) panic("Not enough memory to bootstrap Xen.\n"); l2e_write(&idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i], @@ -106,11 +116,12 @@ void __init paging_init(void) l2e_write(&idle_pg_table_l2[l2_linear_offset(RO_MPT_VIRT_START) + i], l2e_from_page( pg, (__PAGE_HYPERVISOR | _PAGE_PSE) & ~_PAGE_RW)); + /* Fill with an obvious debug pattern. */ + memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), 0x55, + 1UL << L2_PAGETABLE_SHIFT); } - - /* Fill with an obvious debug pattern. */ - for ( i = 0; i < (mpt_size / BYTES_PER_LONG); i++) - set_gpfn_from_mfn(i, 0x55555555); +#undef CNT +#undef MFN /* Create page tables for ioremap()/map_domain_page_global(). */ for ( i = 0; i < (IOREMAP_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ ) @@ -163,14 +174,17 @@ void __init subarch_init_memory(void) { unsigned long m2p_start_mfn; unsigned int i, j; + l2_pgentry_t l2e; BUILD_BUG_ON(sizeof(struct page_info) != 24); /* M2P table is mappable read-only by privileged domains. */ for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ ) { - m2p_start_mfn = l2e_get_pfn( - idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i]); + l2e = idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i]; + if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) + continue; + m2p_start_mfn = l2e_get_pfn(l2e); for ( j = 0; j < L2_PAGETABLE_ENTRIES; j++ ) { struct page_info *page = mfn_to_page(m2p_start_mfn + j); @@ -191,8 +205,9 @@ void __init subarch_init_memory(void) long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) { struct xen_machphys_mfn_list xmml; - unsigned long mfn; + unsigned long mfn, last_mfn; unsigned int i, max; + l2_pgentry_t l2e; long rc = 0; switch ( op ) @@ -203,12 +218,18 @@ long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) max = min_t(unsigned int, xmml.max_extents, mpt_size >> 21); - for ( i = 0; i < max; i++ ) + for ( i = 0, last_mfn = 0; i < max; i++ ) { - mfn = l2e_get_pfn(idle_pg_table_l2[l2_linear_offset( - RDWR_MPT_VIRT_START + (i << 21))]) + l1_table_offset(i << 21); + l2e = idle_pg_table_l2[l2_linear_offset( + RDWR_MPT_VIRT_START + (i << 21))]; + if ( l2e_get_flags(l2e) & _PAGE_PRESENT ) + mfn = l2e_get_pfn(l2e); + else + mfn = last_mfn; + ASSERT(mfn); if ( copy_to_guest_offset(xmml.extent_start, i, &mfn, 1) ) return -EFAULT; + last_mfn = mfn; } xmml.nr_extents = i; diff --git a/xen/arch/x86/x86_64/compat/mm.c b/xen/arch/x86/x86_64/compat/mm.c index 8cfe95a25e..e444aa097b 100644 --- a/xen/arch/x86/x86_64/compat/mm.c +++ b/xen/arch/x86/x86_64/compat/mm.c @@ -153,19 +153,31 @@ int compat_arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) } case XENMEM_machphys_mfn_list: + { + unsigned long limit; + compat_pfn_t last_mfn; + if ( copy_from_guest(&xmml, arg, 1) ) return -EFAULT; - for ( i = 0, v = RDWR_COMPAT_MPT_VIRT_START; - (i != xmml.max_extents) && (v != RDWR_COMPAT_MPT_VIRT_END); + limit = (unsigned long)(compat_machine_to_phys_mapping + + min_t(unsigned long, max_page, + MACH2PHYS_COMPAT_NR_ENTRIES(current->domain))); + if ( limit > RDWR_COMPAT_MPT_VIRT_END ) + limit = RDWR_COMPAT_MPT_VIRT_END; + for ( i = 0, v = RDWR_COMPAT_MPT_VIRT_START, last_mfn = 0; + (i != xmml.max_extents) && (v < limit); i++, v += 1 << L2_PAGETABLE_SHIFT ) { l2e = compat_idle_pg_table_l2[l2_table_offset(v)]; - if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) - break; - mfn = l2e_get_pfn(l2e) + l1_table_offset(v); + if ( l2e_get_flags(l2e) & _PAGE_PRESENT ) + mfn = l2e_get_pfn(l2e); + else + mfn = last_mfn; + ASSERT(mfn); if ( copy_to_compat_offset(xmml.extent_start, i, &mfn, 1) ) return -EFAULT; + last_mfn = mfn; } xmml.nr_extents = i; @@ -173,6 +185,7 @@ int compat_arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) rc = -EFAULT; break; + } default: rc = -ENOSYS; diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c index 98d85d4cb0..87a2a3be32 100644 --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -194,7 +194,7 @@ void __init pfn_pdx_hole_setup(unsigned long mask) void __init paging_init(void) { unsigned long i, mpt_size, va; - unsigned int memflags; + unsigned int n, memflags; l3_pgentry_t *l3_ro_mpt; l2_pgentry_t *l2_ro_mpt = NULL; struct page_info *l1_pg, *l2_pg, *l3_pg; @@ -213,6 +213,11 @@ void __init paging_init(void) */ mpt_size = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1; mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL); +#define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned long)) +#define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \ + sizeof(*machine_to_phys_mapping)) + BUILD_BUG_ON((sizeof(*frame_table) & ~sizeof(*frame_table)) % \ + sizeof(*machine_to_phys_mapping)); for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ ) { BUILD_BUG_ON(RO_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1)); @@ -222,37 +227,63 @@ void __init paging_init(void) if ( cpu_has_page1gb && !((unsigned long)l2_ro_mpt & ~PAGE_MASK) && - (mpt_size >> L3_PAGETABLE_SHIFT) > (i >> PAGETABLE_ORDER) && - (l1_pg = alloc_domheap_pages(NULL, 2 * PAGETABLE_ORDER, - memflags)) != NULL ) + (mpt_size >> L3_PAGETABLE_SHIFT) > (i >> PAGETABLE_ORDER) ) + { + unsigned int k, holes; + + for ( holes = k = 0; k < 1 << PAGETABLE_ORDER; ++k) + { + for ( n = 0; n < CNT; ++n) + if ( mfn_valid(MFN(i + k) + n * PDX_GROUP_COUNT) ) + break; + if ( n == CNT ) + ++holes; + } + if ( k == holes ) + { + i += (1UL << PAGETABLE_ORDER) - 1; + continue; + } + if ( holes == 0 && + (l1_pg = alloc_domheap_pages(NULL, 2 * PAGETABLE_ORDER, + memflags)) != NULL ) + { + map_pages_to_xen( + RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT), + page_to_mfn(l1_pg), + 1UL << (2 * PAGETABLE_ORDER), + PAGE_HYPERVISOR); + memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), + 0x77, 1UL << L3_PAGETABLE_SHIFT); + + ASSERT(!l2_table_offset(va)); + /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */ + l3e_write(&l3_ro_mpt[l3_table_offset(va)], + l3e_from_page(l1_pg, + /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT)); + i += (1UL << PAGETABLE_ORDER) - 1; + continue; + } + } + + for ( n = 0; n < CNT; ++n) + if ( mfn_valid(MFN(i) + n * PDX_GROUP_COUNT) ) + break; + if ( n == CNT ) + l1_pg = NULL; + else if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, + memflags)) == NULL ) + goto nomem; + else { map_pages_to_xen( RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT), page_to_mfn(l1_pg), - 1UL << (2 * PAGETABLE_ORDER), + 1UL << PAGETABLE_ORDER, PAGE_HYPERVISOR); memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), - 0x77, 1UL << L3_PAGETABLE_SHIFT); - - ASSERT(!l2_table_offset(va)); - /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */ - l3e_write(&l3_ro_mpt[l3_table_offset(va)], - l3e_from_page(l1_pg, - /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT)); - i += (1UL << PAGETABLE_ORDER) - 1; - continue; + 0x55, 1UL << L2_PAGETABLE_SHIFT); } - - if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, - memflags)) == NULL ) - goto nomem; - map_pages_to_xen( - RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT), - page_to_mfn(l1_pg), - 1UL << PAGETABLE_ORDER, - PAGE_HYPERVISOR); - memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), 0x55, - 1UL << L2_PAGETABLE_SHIFT); if ( !((unsigned long)l2_ro_mpt & ~PAGE_MASK) ) { if ( (l2_pg = alloc_domheap_page(NULL, memflags)) == NULL ) @@ -264,10 +295,13 @@ void __init paging_init(void) ASSERT(!l2_table_offset(va)); } /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */ - l2e_write(l2_ro_mpt, l2e_from_page( - l1_pg, /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT)); + if ( l1_pg ) + l2e_write(l2_ro_mpt, l2e_from_page( + l1_pg, /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT)); l2_ro_mpt++; } +#undef CNT +#undef MFN /* Create user-accessible L2 directory to map the MPT for compat guests. */ BUILD_BUG_ON(l4_table_offset(RDWR_MPT_VIRT_START) != @@ -288,12 +322,22 @@ void __init paging_init(void) mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL); if ( (m2p_compat_vstart + mpt_size) < MACH2PHYS_COMPAT_VIRT_END ) m2p_compat_vstart = MACH2PHYS_COMPAT_VIRT_END - mpt_size; - for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ ) +#define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned int)) +#define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \ + sizeof(*compat_machine_to_phys_mapping)) + BUILD_BUG_ON((sizeof(*frame_table) & ~sizeof(*frame_table)) % \ + sizeof(*compat_machine_to_phys_mapping)); + for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++, l2_ro_mpt++ ) { memflags = MEMF_node(phys_to_nid(i << (L2_PAGETABLE_SHIFT - 2 + PAGE_SHIFT))); + for ( n = 0; n < CNT; ++n) + if ( mfn_valid(MFN(i) + n * PDX_GROUP_COUNT) ) + break; + if ( n == CNT ) + continue; if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, - memflags)) == NULL ) + memflags)) == NULL ) goto nomem; map_pages_to_xen( RDWR_COMPAT_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT), @@ -306,8 +350,9 @@ void __init paging_init(void) 1UL << L2_PAGETABLE_SHIFT); /* NB. Cannot be GLOBAL as the ptes get copied into per-VM space. */ l2e_write(l2_ro_mpt, l2e_from_page(l1_pg, _PAGE_PSE|_PAGE_PRESENT)); - l2_ro_mpt++; } +#undef CNT +#undef MFN /* Set up linear page table mapping. */ l4e_write(&idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)], @@ -428,7 +473,7 @@ long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) l3_pgentry_t l3e; l2_pgentry_t l2e; unsigned long v; - xen_pfn_t mfn; + xen_pfn_t mfn, last_mfn; unsigned int i; long rc = 0; @@ -440,29 +485,32 @@ long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) BUILD_BUG_ON(RDWR_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1)); BUILD_BUG_ON(RDWR_MPT_VIRT_END & ((1UL << L3_PAGETABLE_SHIFT) - 1)); - for ( i = 0, v = RDWR_MPT_VIRT_START; - (i != xmml.max_extents) && (v != RDWR_MPT_VIRT_END); + for ( i = 0, v = RDWR_MPT_VIRT_START, last_mfn = 0; + (i != xmml.max_extents) && + (v < (unsigned long)(machine_to_phys_mapping + max_page)); i++, v += 1UL << L2_PAGETABLE_SHIFT ) { l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[ l3_table_offset(v)]; if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) - break; - if ( !(l3e_get_flags(l3e) & _PAGE_PSE) ) + mfn = last_mfn; + else if ( !(l3e_get_flags(l3e) & _PAGE_PSE) ) { l2e = l3e_to_l2e(l3e)[l2_table_offset(v)]; - if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) - break; - mfn = l2e_get_pfn(l2e); + if ( l2e_get_flags(l2e) & _PAGE_PRESENT ) + mfn = l2e_get_pfn(l2e); + else + mfn = last_mfn; } else { mfn = l3e_get_pfn(l3e) + (l2_table_offset(v) << PAGETABLE_ORDER); } - ASSERT(!l1_table_offset(v)); + ASSERT(mfn); if ( copy_to_guest_offset(xmml.extent_start, i, &mfn, 1) ) return -EFAULT; + last_mfn = mfn; } xmml.nr_extents = i; -- 2.30.2